library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.2 ✔ readr 2.1.4
#> ✔ forcats 1.0.0 ✔ stringr 1.5.0
#> ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
#> ✔ purrr 1.0.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidylog)
#>
#> Attaching package: 'tidylog'
#>
#> The following objects are masked from 'package:dplyr':
#>
#> add_count, add_tally, anti_join, count, distinct, distinct_all,
#> distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
#> full_join, group_by, group_by_all, group_by_at, group_by_if,
#> inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
#> relocate, rename, rename_all, rename_at, rename_if, rename_with,
#> right_join, sample_frac, sample_n, select, select_all, select_at,
#> select_if, semi_join, slice, slice_head, slice_max, slice_min,
#> slice_sample, slice_tail, summarise, summarise_all, summarise_at,
#> summarise_if, summarize, summarize_all, summarize_at, summarize_if,
#> tally, top_frac, top_n, transmute, transmute_all, transmute_at,
#> transmute_if, ungroup
#>
#> The following objects are masked from 'package:tidyr':
#>
#> drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
#> spread, uncount
#>
#> The following object is masked from 'package:stats':
#>
#> filter
library(RColorBrewer)
library(viridis)
#> Loading required package: viridisLite
library(broom)
library(rgdal)
#> Loading required package: sp
#> Please note that rgdal will be retired during 2023,
#> plan transition to sf/stars/terra functions using GDAL and PROJ
#> at your earliest convenience.
#> See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
#> rgdal: version: 1.6-6, (SVN revision 1201)
#> Geospatial Data Abstraction Library extensions to R successfully loaded
#> Loaded GDAL runtime: GDAL 3.4.2, released 2022/03/08
#> Path to GDAL shared files: /Users/maxlindmark/Library/Caches/org.R-project.R/R/renv/cache/v5/R-4.2/x86_64-apple-darwin17.0/rgdal/1.6-6/05c7c91191407d54da92816fb849ab29/rgdal/gdal
#> GDAL binary built with GEOS: FALSE
#> Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
#> Path to PROJ shared files: /Users/maxlindmark/Library/Caches/org.R-project.R/R/renv/cache/v5/R-4.2/x86_64-apple-darwin17.0/rgdal/1.6-6/05c7c91191407d54da92816fb849ab29/rgdal/proj
#> PROJ CDN enabled: FALSE
#> Linking to sp version:1.6-0
#> To mute warnings of possible GDAL/OSR exportToProj4() degradation,
#> use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
library(ggmap)
#> ℹ Google's Terms of Service: <https://mapsplatform.google.com>
#> Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
#> OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
#> ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(ggridges)
library(rnaturalearth)
library(rnaturalearthdata)
#>
#> Attaching package: 'rnaturalearthdata'
#>
#> The following object is masked from 'package:rnaturalearth':
#>
#> countries110
library(forcats)
# devtools::install_github("seananderson/ggsidekick") # not on CRAN
library(ggsidekick); theme_set(theme_sleek())Explore cleaned data of back-calculated length-at-age of perch from gillnet-survey data from SLU databases
Load libraries
Read data
d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/perch-growth/master/data/for-analysis/dat.csv")
#> Rows: 338460 Columns: 12
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (6): age_ring, area, gear, ID, sex, keep
#> dbl (6): length_mm, age_bc, age_catch, catch_year, cohort, final_length
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(d)
#> Rows: 338,460
#> Columns: 12
#> $ length_mm <dbl> 62, 115, 153, 168, 184, 51, 110, 143, 155, 163, 71, 112, …
#> $ age_bc <dbl> 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 5, …
#> $ age_catch <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5, 5, …
#> $ age_ring <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y…
#> $ area <chr> "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT", "BT…
#> $ catch_year <dbl> 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1977, 197…
#> $ cohort <dbl> 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 1972, 197…
#> $ final_length <dbl> 184, 184, 184, 184, 184, 163, 163, 163, 163, 163, 178, 17…
#> $ gear <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
#> $ ID <chr> "1977_1_BT.male.5.NA", "1977_1_BT.male.5.NA", "1977_1_BT.…
#> $ sex <chr> "male", "male", "male", "male", "male", "male", "male", "…
#> $ keep <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y…
# Use only length-at-age by filtering on age_ring
# Since there is growth after the age ring was formed (+ growth),
# we filter to get lengths that correspond to age rings only
d <- d %>% filter(age_ring == "Y")
#> filter: removed 27,876 rows (8%), 310,584 rows remaining
# age_bc is back-calculated age
# age_catch is age at catchPlot data
All data
ggplot(d, aes(age_bc, length_mm, color = area)) +
geom_jitter(height = 0, alpha = 0.2) +
scale_color_brewer(palette = "Paired")
ggplot(d, aes(age_bc, length_mm, color = area)) +
geom_jitter(height = 0, alpha = 0.2) +
guides(color = "none") +
scale_color_brewer(palette = "Paired") +
facet_wrap(~ area)Sample locations & length of time series
sf::sf_use_s2(FALSE)
#> Spherical geometry (s2) switched off
# Read UTM function
LongLatToUTM <- function(x, y, zone){
xy <- data.frame(ID = 1:length(x), X = x, Y = y)
coordinates(xy) <- c("X", "Y")
proj4string(xy) <- CRS("+proj=longlat +datum=WGS84") ## for example
res <- spTransform(xy, CRS(paste("+proj=utm +zone=",zone," ellps=WGS84",sep='')))
return(as.data.frame(res))
}
# Specify ranges for big map
ymin = 52; ymax = 67; xmin = 11; xmax = 25
map_data <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf", continent = "europe")
# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
st_crop(map_data,
c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))
# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)
# Add point to areas
sort(unique(d$area))
#> [1] "BS" "BT" "FB" "FM" "HO" "JM" "MU" "RA" "SI_EK"
#> [10] "SI_HA" "TH" "VN"
df <- data.frame(Area = c("Brunskar (BS)", "Biotest (BT)", "Finbo (FB)", "Forsmark (FM)",
"Holmon (HO)", "Kvadofjarden (JM)", "Musko (MU)", "Ranea (RA)",
"Simpevarp 1 (SI_EK", "Simpevarp 2 (SI_HA", "Torhamn (TH)", "Vino (VN)"),
lon = c(21.5, 18.1, 19.5, 18, 20.9, 16.8, 18.1, 22.3, 16.6, 16.7, 15.9, 16.9),
lat = c(60, 60.4, 60.3, 60.5, 63.7, 58, 59, 65.9, 57.3, 57.4, 56.1, 57.5))
# Add UTM coords
utm_coords <- LongLatToUTM(df$lon, df$lat, zone = 33)
df$X <- utm_coords$X
df$Y <- utm_coords$Y
# Crop the plot
xmin2 <- 330000; xmax2 <- 959000; xrange <- xmax - xmin
ymin2 <- 6000000; ymax2 <- 7300000; yrange <- ymax - ymin
ggplot(swe_coast_proj) +
geom_sf() +
coord_sf(xlim = c(xmin2, xmax2), ylim = c(ymin2, ymax2)) +
geom_point(data = df, aes(x = X, y = Y, color = Area), size = 3) +
labs(x = "Longitude", y = "Latitude") +
scale_color_brewer(palette = "Paired")Sample sizes
# Average sample size by ID
d %>%
group_by(cohort, area, ID) %>%
summarise(n = n()) %>%
ungroup() %>%
ggplot(aes(x = n, y = area, n, fill = area)) +
geom_density_ridges(stat = "binline", bins = 20, scale = 1, draw_baseline = FALSE, alpha = 0.8) +
scale_fill_brewer(palette = "Paired") +
guides(fill = "none")
#> group_by: 3 grouping variables (cohort, area, ID)
#> summarise: now 82,554 rows and 4 columns, 2 group variables remaining (cohort, area)
#> ungroup: no grouping variables
# Average sample size by area, cohort and age
d %>%
group_by(cohort, area, age_bc) %>%
summarise(n = n()) %>%
ungroup() %>%
ggplot(aes(cohort, age_bc,color = n, fill = n)) +
facet_wrap(~area) +
geom_tile(shape = 21, alpha = 0.8) +
scale_color_viridis() +
scale_fill_viridis()
#> group_by: 3 grouping variables (cohort, area, age_bc)
#> summarise: now 4,012 rows and 4 columns, 2 group variables remaining (cohort, area)
#> ungroup: no grouping variables
#> Warning in geom_tile(shape = 21, alpha = 0.8): Ignoring unknown parameters:
#> `shape`
# Sample size by gear (some overlapping gears with different names)
d %>%
group_by(gear, area) %>%
summarise(n = n()) %>%
ggplot(aes(factor(gear), n, fill = area)) +
geom_bar(stat = "identity") +
guides(fill = "none") +
facet_wrap(~area, scales = "free") +
scale_fill_brewer(palette = "Paired") +
theme(axis.text.x = element_text(angle = 90))
#> group_by: 2 grouping variables (gear, area)
#> summarise: now 36 rows and 3 columns, one group variable remaining (gear)
# Plot sample size by area and cohort (all length-at-ages)
d %>%
group_by(cohort, area) %>%
summarise(n = n()) %>%
ggplot(aes(cohort, n, fill = area)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired")
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 473 rows and 3 columns, one group variable remaining (cohort)
d %>%
group_by(cohort, area) %>%
summarise(n = n()) %>%
ggplot(aes(cohort, n, color = area)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
facet_wrap(~area) +
guides(color = "none") +
theme(axis.text.x = element_text(angle = 90))
#> group_by: 2 grouping variables (cohort, area)
#> summarise: now 473 rows and 3 columns, one group variable remaining (cohort)
# Plot sample size by area and catch_year
d %>%
group_by(catch_year, area) %>%
summarise(n = n()) %>%
ggplot(aes(catch_year, n, fill = area)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired")
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 340 rows and 3 columns, one group variable remaining (catch_year)
d %>%
group_by(catch_year, area) %>%
summarise(n = n()) %>%
ggplot(aes(catch_year, n, color = area)) +
geom_line() +
scale_color_brewer(palette = "Paired") +
facet_wrap(~area) +
guides(color = "none") +
theme(axis.text.x = element_text(angle = 90))
#> group_by: 2 grouping variables (catch_year, area)
#> summarise: now 340 rows and 3 columns, one group variable remaining (catch_year)Trends in length-at-age
# All ages
d %>%
group_by(age_bc, area) %>%
ggplot(aes(catch_year, length_mm, color = area)) +
geom_point(size = 0.1, alpha = 0.5) +
facet_wrap(~age_bc) +
scale_color_brewer(palette = "Paired", name = "Area") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2)))
#> group_by: 2 grouping variables (age_bc, area)
# Filter common ages
d %>%
group_by(age_bc, area) %>%
filter(age_bc < 13) %>%
ggplot(aes(catch_year, length_mm, color = factor(age_bc))) +
geom_point(size = 0.1, alpha = 0.5) +
facet_wrap(~area, ncol = 6) +
scale_color_brewer(palette = "Paired", name = "Age") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2)))
#> group_by: 2 grouping variables (age_bc, area)
#> filter (grouped): removed 238 rows (<1%), 310,346 rows remaining
# Age-area grid
d %>%
group_by(age_bc, area) %>%
filter(age_bc < 7) %>%
ggplot(aes(catch_year, length_mm, color = factor(age_bc))) +
geom_point(size = 0.1, alpha = 0.5) +
stat_smooth(aes(catch_year, length_mm, group = factor(age_bc)),
se = F, formula = y ~ s(x, k = 3), color = "grey30") +
facet_grid(age_bc~area) +
scale_color_brewer(palette = "Paired", name = "Age") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 2)))
#> group_by: 2 grouping variables (age_bc, area)
#> filter (grouped): removed 14,268 rows (5%), 296,316 rows remaining
#> `geom_smooth()` using method = 'gam'Time-slopes of length-at-age
# Calculate time-slopes by age and area
time_slopes_by_year_area <- d %>%
group_by(age_bc, area) %>% # center length at age for comparison across ages
mutate(length_mm_ct = length_mm / mean(length_mm)) %>%
ungroup() %>%
mutate(id = paste(age_bc, area, sep = ";")) %>%
split(.$id) %>%
purrr::map(~lm(length_mm_ct ~ catch_year, data = .x)) %>%
purrr::map_df(broom::tidy, .id = 'id') %>%
filter(term == 'catch_year') %>%
separate(id, into = c("age_bc", "area"), sep = ";") %>%
mutate(upr = estimate + std.error*2, lwr = estimate - std.error*2) %>%
mutate(id = paste(age_bc, area, sep = ";"))
#> group_by: 2 grouping variables (age_bc, area)
#> mutate (grouped): new variable 'length_mm_ct' (double) with 34,430 unique values and 0% NA
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
#> filter: removed 167 rows (50%), 167 rows remaining
#> mutate: new variable 'upr' (double) with 154 unique values and 9% NA
#> new variable 'lwr' (double) with 154 unique values and 9% NA
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
time_slopes_by_year_area
#> # A tibble: 167 × 10
#> age_bc area term estimate std.error statistic p.value upr lwr
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 BS catch_ye… 0.00474 0.000451 10.5 1.53e- 25 0.00564 0.00384
#> 2 1 BT catch_ye… 0.0169 0.000294 57.4 0 0.0175 0.0163
#> 3 1 FB catch_ye… 0.00444 0.0000989 44.9 0 0.00464 0.00424
#> 4 1 FM catch_ye… 0.00370 0.000128 28.9 1.99e-176 0.00396 0.00344
#> 5 1 HO catch_ye… 0.00463 0.000186 24.9 6.21e-132 0.00501 0.00426
#> 6 1 JM catch_ye… 0.00429 0.0000808 53.1 0 0.00445 0.00413
#> 7 1 MU catch_ye… 0.00762 0.000489 15.6 2.50e- 53 0.00860 0.00665
#> 8 1 RA catch_ye… 0.00285 0.000636 4.48 7.93e- 6 0.00412 0.00158
#> 9 1 SI_EK catch_ye… 0.00581 0.000171 34.0 4.94e-231 0.00616 0.00547
#> 10 1 SI_HA catch_ye… 0.00480 0.000148 32.5 9.65e-217 0.00510 0.00451
#> # ℹ 157 more rows
#> # ℹ 1 more variable: id <chr>
# Add sample size so that we can filter on that
sample_size <- d %>%
group_by(age_bc, area) %>%
summarise(n = n()) %>%
ungroup() %>%
mutate(id = paste(age_bc, area, sep = ";")) %>%
dplyr::select(n, id)
#> group_by: 2 grouping variables (age_bc, area)
#> summarise: now 167 rows and 3 columns, one group variable remaining (age_bc)
#> ungroup: no grouping variables
#> mutate: new variable 'id' (character) with 167 unique values and 0% NA
# Join sample size
time_slopes_by_year_area <- left_join(time_slopes_by_year_area, sample_size)
#> Joining with `by = join_by(id)`
#> left_join: added one column (n)
#> > rows only in x 0
#> > rows only in y ( 0)
#> > matched rows 167
#> > =====
#> > rows total 167
# Plot effect sizes
time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_color_brewer(palette = "Paired") +
facet_wrap(~factor(area), scales = "free") +
labs(x = "Age", y = "slope: size~time") +
theme(legend.position = "bottom")
#> filter: removed 53 rows (32%), 114 rows remaining
time_slopes_by_year_area %>%
mutate(age_bc=as.numeric(age_bc)) %>%
filter(n > 30) %>%
ggplot(aes(area,estimate,color = factor(age_bc))) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_color_brewer(palette = "Paired") +
facet_wrap(~factor(age_bc), scales = "free") +
labs(x = "Age", y = "slope: size~time") +
theme(legend.position = "bottom")
#> mutate: converted 'age_bc' from character to double (0 new NA)
#> filter: removed 53 rows (32%), 114 rows remaining
#> Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Paired is 12
#> Returning the palette you asked for with that many colors
#> Warning: Removed 1 rows containing missing values (`geom_point()`).
time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(aes(reorder(age_bc, as.numeric(age_bc)), estimate, color = factor(area))) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = lwr, ymax = upr), width = 0.2,
position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_color_brewer(palette = "Paired") +
labs(x = "Age", y = "slope: size~time")
#> filter: removed 53 rows (32%), 114 rows remaining
time_slopes_by_year_area %>%
filter(n > 30) %>%
ggplot(aes(as.numeric(age_bc), estimate)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_hline(yintercept = 0, linetype = "dashed") +
stat_smooth(method = "gam", formula = y ~ s(x, k = 3)) +
labs(x = "Age", y = "slope: size~time")
#> filter: removed 53 rows (32%), 114 rows remaining